clear
clear mata
clear matrix

cd ..                                                                            /* Goes back to parent folder */

capture log close
log using "log/03_Maps.log", replace

mkdir /tmp                                                                       /* creates a temporary folder to store temp files */

*       Patriotism!

*       FIRST VERSION  June       3, 2022
*       THIS VERSION   June       9, 2022
*       LAST RUN       June       9, 2022

*       LAST REVISOR		BC

*       Log of revisions:

*       This prepares all the geographical files to be used in Stata

********************************************************************************
****                      PLAN OF THE PROCEDURE                             ****
****                                                                        ****
****    1. Prepare the dtas for the whole US                                ****
****          a. Convert the shp into dtas                                  ****
****          b. Drop Alska and the Hawaii                                  ****
****                i. Contiguous Albers Equal Area Conic                   ****
****    2. Prepare the county databases                                     ****
****          a. Convert the shp into dtas                                  ****
****          b. Prepare the New Deal counties database                     ****
****          c. Prepare bridges >New Deal counties                         ****
****              i. Counties                                               ****
****             ii. Climatic divisions                                     ****
****            iii. Congressional districts                                ****
****          d. Prepare bridges >1940 (for the ASN individual-level file)  ****
****              i. Counties                                               ****
****             ii. Climatic divisions                                     ****
****    3. Erase junk                                                       ****
********************************************************************************

do "code-build/01_Shopping_List.do"

********************************************************************************
****    1. Prepare the dtas for the whole US                                ****
****          a. Convert the shp into dtas                                  ****
********************************************************************************
shp2dta using "rawdata/maps/USA_CA.shp", data("tmp/USA_CA") coord("data/C-USA_CA") replace

********************************************************************************
****          b. Drop Alska and the Hawaii                                  ****
********************************************************************************
use "data/C-USA_CA", replace

drop if _Y >  1850000 & _Y != . /* This is Alaska */
drop if _X < -4500000 & _X != . /* This is Hawaii */

save "data/C-USA_CA", replace

********************************************************************************
****    2. Prepare the county databases                                     ****
****          a. Convert the shp into dtas                                  ****
********************************************************************************
shp2dta using "rawdata/NHGIS/CountiesND.shp"      , data("data/CountiesND_CA"   ) coord("data/C-CountiesND_CA"   ) replace
shp2dta using "rawdata/NHGIS/CountiesND_WGS84.shp", data("data/CountiesND_WGS84") coord("data/C-CountiesND_WGS84") replace

for YEAR in num 1910 1940 1950 : shp2dta using "rawdata/NHGIS/CountiesND_YEAR.shp"    , data("tmp/CountiesND_YEAR")                   coord("tmp/C-CountiesND-YEAR")               replace
for CONGRESS in num 62 73      : shp2dta using "rawdata/Lewis/CountiesND_CONGRESS.shp", database("tmp/Bridge-ND-DISTRICT_IDCONGRESS") coord("tmp/C-Bridge-ND-DISTRICT_IDCONGRESS") replace

shp2dta using "rawdata/NOAA/CountiesND_CD.shp"  , data("tmp/CountiesND_CD")   coord("tmp/C-CountiesND_CD")   replace
shp2dta using "rawdata/NOAA/Counties1940_CD.shp", data("tmp/Counties1940_CD") coord("tmp/C-Counties1940_CD") replace

********************************************************************************
****          b. Prepare the New Deal counties database                     ****
********************************************************************************
foreach coord in CA WGS84 {
	use "data/CountiesND_`coord'", replace

	ren POLY_AREA  areand
	ren CENTROID_X lon_nd
	ren CENTROID_Y lat_nd 
	ren INSIDE_X   lonx_nd
	ren INSIDE_Y   latx_nd

	order state-countyname _ID

	lab var state        "State name (ICPSR)"
	lab var stateicpsr   "State code (ICPSR)"
	lab var countynd     "County code (Fishback, Kantor & Wallis 2003)"
	lab var countyname   "County name (Fishback, Kantor & Wallis 2003)"
	lab var _ID          "Geographic identifier (use with C-CountiesND_`coord')"
	lab var areand       "Area of New Deal county"
	lab var lon_nd       "Longitude of New Deal county centroid (`coord')"
	lab var lat_nd       "Latitude of New Deal county centroid (`coord')"
	lab var lonx_nd      "Longitude of New Deal county centroid - forced to be inside (`coord')"
	lab var latx_nd      "Latitude of New Deal county centroid - forced to be inside (`coord')"

	save "data/CountiesND_`coord'", replace
} /* foreach coord */

********************************************************************************
****          c. Prepare bridges >New Deal counties                         ****
****              i. Counties                                               ****
********************************************************************************
foreach year in 10 40 50 {
	use "tmp/CountiesND_19`year'", replace

	egen area         = sum(POLY_AREA)  , by(state stateicpsr ICPSRCTYI)
	 gen weight`year' =     POLY_AREA / area

	ren ICPSRCTYI countyicpsr`year'
	keeporder stateicpsr countynd countyicpsr weight`year'
	
	lab var stateicpsr   "State code (ICPSR)"
	lab var countynd     "County code (Fishback, Kantor & Wallis 2003)"
	lab var countyicpsr  "County code (ICPSR 19`year')"
	lab var weight`year' "Weight: (area intersection 19`year'-New Deal county / 19`year' county area)"
	
	save "data/Bridge-countyicpsr19`year'-countynd", replace
} /* foreach year */

********************************************************************************
****             ii. Climatic divisions                                     ****
**** NOTE: there are 2 problems with the intersections between the New Deal ****
****       counties and the climatic divisions polygons. Namely:            ****
****                1. Some polygons of 2 or 3 different climatic divisions ****
****                   lie one on top of the other. This means that the     ****
****                   same polygon is assigned to the same county *twice*. ****
****                   To solve this problem I first identify these         ****
****                   polygons as those that have *exactly* the same area  ****
****                   in the same county. Next, I divide the area in these ****
****                   polygons by the number of times they are repeated    ****
****                   (usually 1 or 2 times). This effectively assigns     ****
****                   half (or 1/3) of the values from one climatic        ****
****                   division and half (or 1/3) from the other(s).        ****
****                2. Parts of some counties are not covered by any        ****
****                   climatic division. Here data effectively don't exist.****
****                   For these places I don't take any action. However,   ****
****                   when I calculate weights I use the total area of the ****
****                   county that is actually covered by at least 1        ****
****                   climatic division. So the weights always sum up to 1.****
********************************************************************************
use "tmp/CountiesND_CD", replace

egen    overlapping_polygons = count(CLIMDIV), by(stateicpsr countynd countyname POLY_AREA)
replace POLY_AREA            = POLY_AREA / overlapping_polygons

egen    area                 = sum(POLY_AREA)  , by(stateicpsr countynd countyname)
 gen    weightCD             =     POLY_AREA / area

keeporder stateicpsr countynd CLIMDIV weightCD

lab var stateicpsr   "State code (ICPSR)"
lab var countynd     "County code (Fishback, Kantor & Wallis 2003)"
lab var CLIMDIV      "Climatic division (NCDC)"
lab var weightCD     "Weight: (area intersection in climatic division / New Deal county area)"

save "data/Bridge-CLIMDIV-countynd", replace
 
********************************************************************************
****            iii. Electoral districts                                    ****
********************************************************************************
foreach i in 62 73 {
	use "tmp/Bridge-ND-DISTRICT_ID`i'", clear
	destring DISTRICT , replace

	bysort countynd stateicpsr: egen county_area = total(AREA_GEO)
	gen WEIGHT = AREA_GEO/county_area
	keep WEIGHT countynd stateicpsr DISTRICT STATENAME _ID

	replace WEIGHT = round(WEIGHT, 0.00001) if WEIGHT < 0.00001 | WEIGHT > 0.99999
	drop if WEIGHT == 0 
	save "data/Bridge-ND-DISTRICT_ID`i'.dta", replace
} /* foreach i */

********************************************************************************
****          d. Prepare bridges >1940 (for the ASN individual-level file)  ****
****              i. Counties                                               ****
********************************************************************************
use "tmp/CountiesND_1940", replace

egen area     = sum(POLY_AREA)  , by(state stateicpsr countynd)
 gen weightnd =     POLY_AREA / area

ren ICPSRCTYI countyicpsr40
keeporder stateicpsr countynd countyicpsr weightnd

lab var stateicpsr   "State code (ICPSR)"
lab var countynd     "County code (Fishback, Kantor & Wallis 2003)"
lab var countyicpsr  "County code (ICPSR 1940)"
lab var weightnd     "Weight: (area intersection New Deal-1940 county / New Deal county area)"

save "data/Bridge-countynd-countyicpsr1940", replace

********************************************************************************
****             ii. Climatic divisions                                     ****
**** NOTE: see above for issues with this bridge                            ****
********************************************************************************
use "tmp/Counties1940_CD", replace

ren countyicps countyicpsr40

egen    overlapping_polygons = count(CLIMDIV), by(stateicpsr countyicpsr40 countyname POLY_AREA)
replace POLY_AREA            = POLY_AREA / overlapping_polygons

egen    area                 = sum(POLY_AREA)  , by(stateicpsr countyicpsr40 countyname)
 gen    weightCD             =     POLY_AREA / area

keeporder stateicpsr countyicpsr40 CLIMDIV weightCD

lab var stateicpsr    "State code (ICPSR)"
lab var countyicpsr40 "County code (ICPSR 1940)"
lab var CLIMDIV      "Climatic division (NCDC)"
lab var weightCD     "Weight: (area intersection in climatic division / 1940 county area)"

save "data/Bridge-CLIMDIV-county1940", replace
 
********************************************************************************
****    3. Service Command 7                                                ****
********************************************************************************

shp2dta using "rawdata/maps/ServiceCommand7.shp", data("tmp/ServiceCommand7") coord("data/C-ServiceCommand7") replace


********************************************************************************
****    3. Erase junk                                                       ****
********************************************************************************
erase "tmp/USA_WGS84.dta"
erase "tmp/USA_CA.dta"

foreach year in 1910 1940 1950 {
	erase "tmp/CountiesND_`year'.dta"
	erase "tmp/C-CountiesND_`year'.dta"
} /* foreach year */

erase "tmp/CountiesND_CD.dta"
erase "tmp/C-CountiesND_CD.dta"
erase "tmp/Counties1940_CD.dta"
erase "tmp/C-Counties1940_CD.dta"

erase "tmp/C-Bridge-ND-DISTRICT_ID62.dta"
erase "tmp/Bridge-ND-DISTRICT_ID62.dta"
erase "tmp/C-Bridge-ND-DISTRICT_ID73.dta"
erase "tmp/Bridge-ND-DISTRICT_ID73.dta"
erase "tmp/ServiceCommand7.dta"

rmdir "tmp/"

log close
exit